# coding=utf-8

from lxml import etree
import urllib2
from bs4 import BeautifulSoup
import requests
from lxml import html
import datetime
from spider.models import CurrListedCorp,CreditInfo

import urllib

from spider.items import CreditInfoItem

data1 = '深圳市全新好股份有限公司'
url = 'http://www.bgcheck.cn/MemberCenter/FirmCredit/Search.html?Keywords='+urllib.quote(data1)

print url

#req = urllib2.Request(url)
#req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:33.0) Gecko/20100101 Firefox/33.0')

data = {
'__EVENTTARGET':'AspNetPagerPaging',
'__VIEWSTATEGENERATOR':'F7914D64',
'__EVENTARGUMENT': '4'

}

html_post = requests.post(url, data=data)

'''参数传递进去，没效果'''
#print html_post.text

tree = html.fromstring(html_post.text)


corp_name = tree.xpath('//*[@id="content1"]/ul[1]/li[1]/a[1]/em/text()')

credit = tree.xpath('//*[@id="content1"]/ul[1]/li[1]/span/a[1]/text()')

print corp_name

print credit

item = CreditInfoItem()
item['corp_name'] = corp_name
item['credit'] = credit

item['time'] = datetime.datetime.now().strftime(
    '%Y%m%d'
)

print item


